## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Read data

source(paste0(root.dir, '../step2/scripts/myRLib.R'))
intermediate.files <- strsplit('../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p1.0000e-03.txt:../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p3.0000e-04.txt:../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p1.0000e-04.txt', ':')[[1]]
ps <- strsplit('0.001,0.0003,0.0001', ',')[[1]]
expression.files <- strsplit('output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Artery_Coronary_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/DGN-HapMap-2015_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Whole_Blood_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Cells_EBV-transformed_lymphocytes_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Thyroid_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Stomach_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Small_Intestine_Terminal_Ileum_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Muscularis_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Spleen_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Pancreas_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Gastroesophageal_Junction_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adrenal_Gland_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adipose_Visceral_Omentum_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adipose_Subcutaneous_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Pituitary_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Colon_Transverse_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Mucosa_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Liver_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Lung_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Muscle_Skeletal_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Cells_Transformed_fibroblasts_predicted_expression.txt', ':')[[1]]
ts <- strsplit('Artery_Coronary,DGN-HapMap-2015,Whole_Blood,Cells_EBV-transformed_lymphocytes,Thyroid,Stomach,Small_Intestine_Terminal_Ileum,Esophagus_Muscularis,Spleen,Pancreas,Esophagus_Gastroesophageal_Junction,Adrenal_Gland,Adipose_Visceral_Omentum,Adipose_Subcutaneous,Pituitary,Colon_Transverse,Esophagus_Mucosa,Liver,Lung,Muscle_Skeletal,Cells_Transformed_fibroblasts', ',')[[1]]
gene <- read.table(getFile(root.dir, 'output-prepare_gene_id/fastInsulin__wtccc_t2d__wtccc_ctrl.genelist'), header = F, sep = '\t')
colnames(gene) <- c('gene.name', 'gencode.id')
yi <- read.table(getFile(root.dir, '../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/yi.logistic.assoc'), header = T)
cols <- colnames(yi)
yi.filenames <- apply(yi, 1, function(x) { return(basename(x[1]))})
yi.container <- c()
expression.list <- list()
for(i in 1 : length(ts)) {
    expression.list[[ts[i]]] <- read.table(getFile(root.dir, expression.files[i]), header = T, sep = '  ')
}
intermediate.list <- list()
for(i in 1 : length(ps)) {
    thisfile <- basename(intermediate.files[i])
    yi.container <- rbind(yi.container, c(ps[i], yi[yi.filenames == thisfile, 2:7]))
    intermediate.list[[ps[i]]] <- read.table(getFile(root.dir, intermediate.files[i]), header = T, sep = ' ')
    intermediate.list[[ps[i]]]$PHENO <- intermediate.list[[ps[i]]]$PHENO - 1
}
yi.container <- data.frame(yi.container)
colnames(yi.container) <- c('causal.fraction', cols[2:7])
pander(gene, knitr.auto.asis = T)
gene.name gencode.id
RAB38 ENSG00000123892.7
UHRF1 ENSG00000034063.9
UHRF1BP1 ENSG00000065060.12
UHRF1BP1L ENSG00000111647.8
ANKS1A ENSG00000064999.12
HRH1 ENSG00000196639.6
LGR5 ENSG00000139292.8
TSPAN8 ENSG00000127324.4

Logistic regression with interaction term \(Y \sim E + I + I*E\)

yei.container <- c()
for (i in 1:length(intermediate.list)) {
    cat("##", ps[i])
    cat("\n\n")
    
    inter <- intermediate.list[[ps[i]]]
    temp.container <- c()
    for (t in names(expression.list)) {
        expre <- expression.list[[t]]
        df <- inner_join(inter, expre, by = "IID")
        for (g in gene[, 2]) {
            if (sum(abs(expre[, g])) != 0) {
                this.expre <- scale(expre[, g])
            } else {
                this.expre <- expre[, g]
            }
            data <- data.frame(y = inter$PHENO, i = inter$PRS, 
                e = this.expre)
            model <- glm(y ~ e + i, family = binomial(link = "logit"), 
                data = data)
            stats <- summary(model)
            temp.container <- rbind(temp.container, c(ps[i], 
                t, g, stats$coefficients[2], stats$coefficients[5], 
                stats$coefficients[11], "Y ~ E + I", "E"))
            temp.container <- rbind(temp.container, c(ps[i], 
                t, g, stats$coefficients[3], stats$coefficients[6], 
                stats$coefficients[12], "Y ~ E + I", "I"))
            model2 <- glm(y ~ e + i + e * i, family = binomial(link = "logit"), 
                data = data)
            stats2 <- summary(model2)
            model3 <- lm(i ~ e, data = data)
            stats3 <- summary(model3)
            temp.container <- rbind(temp.container, c(ps[i], 
                t, g, stats2$coefficients[2], stats2$coefficients[6], 
                stats2$coefficients[14], "Y ~ E + I + E*I", "E"))
            temp.container <- rbind(temp.container, c(ps[i], 
                t, g, stats2$coefficients[3], stats2$coefficients[7], 
                stats2$coefficients[15], "Y ~ E + I + E*I", "I"))
            temp.container <- rbind(temp.container, c(ps[i], 
                t, g, stats2$coefficients[4], stats2$coefficients[8], 
                stats2$coefficients[16], "Y ~ E + I + E*I", "I*E"))
            temp.container <- rbind(temp.container, c(ps[i], 
                t, g, stats3$coefficients[2], stats3$coefficients[4], 
                stats3$coefficients[8], "I ~ E", "E (on I)"))
        }
    }
    temp.container <- data.frame(temp.container)
    colnames(temp.container) <- c("causal.fraction", "tissue", 
        "gene", "estmate", "std", "pval", "type", "term")
    temp.container <- left_join(temp.container, gene, by = c(gene = "gencode.id"))
    temp.container <- cleanDF(temp.container, c("estmate", "std", 
        "pval"))
    
    plot.e <- temp.container[temp.container$term == "E", ]
    ncol <- length(unique(plot.e[!is.na(plot.e$pval), "gene.name"]))/3
    if (ncol == floor(ncol)) {
        ncol <- floor(ncol)
    } else {
        ncol <- floor(ncol) + 1
    }
    dodge <- position_dodge(width = 0.5)
    p2 <- ggplot(plot.e[!is.na(plot.e$pval), ], aes(x = tissue, 
        color = type)) + geom_point(aes(y = estmate), position = dodge) + 
        geom_errorbar(aes(ymin = estmate - 1.96 * std, ymax = estmate + 
            1.96 * std), width = 0.1, position = dodge) + geom_hline(yintercept = 0, 
        color = "black") + coord_flip() + theme(axis.text.x = element_text(angle = 90, 
        hjust = 1)) + geom_text(aes(y = estmate, label = paste0("pval = ", 
        formatC(pval, digits = 2, format = "e")), vjust = -0.3, 
        hjust = 0.3), size = 3, position = dodge) + ggtitle("Marginal effect of E on Y") + 
        ylab("Regression coefficient") + xlab("Expression Tissue") + 
        facet_wrap(~gene.name, scales = "free_x", ncol = 3)
    subchunkify(p2, fig_asp = ncol)
    cat("\n\n")
    
    plot.i <- temp.container[temp.container$term == "I", ]
    ncol <- length(unique(plot.i[!is.na(plot.i$pval), "gene.name"]))/3
    if (ncol == floor(ncol)) {
        ncol <- floor(ncol)
    } else {
        ncol <- floor(ncol) + 1
    }
    dodge <- position_dodge(width = 0.5)
    p2 <- ggplot(plot.i[!is.na(plot.i$pval), ], aes(x = tissue, 
        color = type)) + geom_point(aes(y = estmate), position = dodge) + 
        geom_errorbar(aes(ymin = estmate - 1.96 * std, ymax = estmate + 
            1.96 * std), width = 0.1, position = dodge) + geom_hline(yintercept = 0, 
        color = "black") + coord_flip() + theme(axis.text.x = element_text(angle = 90, 
        hjust = 1)) + geom_text(aes(y = estmate, label = paste0("pval = ", 
        formatC(pval, digits = 2, format = "e")), vjust = -0.3, 
        hjust = 0.3), size = 3, position = dodge) + ggtitle("Marginal effect of I on Y") + 
        ylab("Regression coefficient") + xlab("Expression Tissue") + 
        facet_wrap(~gene.name, scales = "free_x", ncol = 3)
    subchunkify(p2, fig_asp = ncol)
    cat("\n\n")
    
    temp.container.nona <- temp.container[!is.na(temp.container$pval), 
        ]
    for (g in unique(temp.container.nona$gene.name)) {
        cat("###", g, "\n", "\n")
        plot.model2 <- temp.container.nona[temp.container.nona$type != 
            "Y ~ E + I" & temp.container.nona$gene.name == g, 
            ]
        if (nrow(plot.model2) == 0) 
            next
        ratio <- length(unique(plot.model2[!is.na(plot.model2$pval), 
            "tissue"]))/12
        p2 <- ggplot(plot.model2[!is.na(plot.model2$pval), ], 
            aes(x = tissue)) + geom_point(aes(y = estmate)) + 
            geom_errorbar(aes(ymin = estmate - 1.96 * std, ymax = estmate + 
                1.96 * std), width = 0.1) + geom_hline(yintercept = 0, 
            color = "black") + coord_flip() + theme(axis.text.x = element_text(angle = 90, 
            hjust = 1)) + geom_text(aes(y = estmate, label = paste0("pval = ", 
            formatC(pval, digits = 2, format = "e")), vjust = -0.7, 
            hjust = 0.5), size = 3, position = dodge) + ggtitle("Logistic regression with interaction") + 
            ylab("Regression coefficient") + xlab("Expression Tissue") + 
            facet_wrap(~term, scales = "free_x", ncol = 4)
        subchunkify(p2, fig_asp = ratio)
        cat("\n\n")
    }
    cat("\n\n")
}

0.001

UHRF1BP1

RAB38

UHRF1BP1L

ANKS1A

HRH1

LGR5

TSPAN8

0.0003

UHRF1BP1

RAB38

UHRF1BP1L

ANKS1A

HRH1

LGR5

TSPAN8

0.0001

UHRF1BP1

RAB38

UHRF1BP1L

ANKS1A

HRH1

LGR5

TSPAN8